import os
from IPython.display import display, HTML
NBdir='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\Visualization\\'
os.chdir(NBdir)
ModuleFolder='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\Visualization\\M01\\IBMs\\HTML\\'
os.chdir(ModuleFolder)
for file in os.listdir(ModuleFolder):
#if file.lower().endswith('HTMLs.html'):
#continue
if file.lower().endswith('.html'):
link=file
#print(link)
display(HTML(ModuleFolder+link))
elif file.lower().endswith('.htm'):
link=file
#print(link)
display(HTML(ModuleFolder+link))
else:
continue
Data Visualization with Python
Cheat Sheet : Data Preprocessing Tasks in Pandas
Task |
Syntax | Description |
Example |
|---|---|---|---|
| Load CSV data | pd.read_csv('filename.csv') |
Read data from a CSV file into a Pandas DataFrame | df_can=pd.read_csv('data.csv') |
| Handling Missing Values | df.dropna() |
Drop rows with missing values | df_can.dropna() |
df.fillna(value) |
Fill missing values with a specified value | df_can.fillna(0) |
|
| Removing Duplicates | df.drop_duplicates() |
Remove duplicate rows | df_can.drop_duplicates() |
| Renaming Columns | df.rename(columns={'old_name': 'new_name'}) |
Rename one or more columns | df_can.rename(columns={'Age': 'Years'}) |
| Selecting Columns | df['column_name'] or df.column_name |
Select a single column | df_can.Age or df_can['Age]' |
df[['col1', 'col2']] |
Select multiple columns | df_can[['Name', 'Age']] |
|
| Filtering Rows | df[df['column'] > value] |
Filter rows based on a condition | df_can[df_can['Age'] > 30] |
| Applying Functions to Columns | df['column'].apply(function_name) |
Apply a function to transform values in a column | df_can['Age'].apply(lambda x: x + 1) |
| Creating New Columns | df['new_column'] = expression |
Create a new column with values derived from existing ones | df_can['Total'] = df_can['Quantity'] * df_can['Price'] |
| Grouping and Aggregating | df.groupby('column').agg({'col1': 'sum', 'col2': 'mean'}) |
Group rows by a column and apply aggregate functions | df_can.groupby('Category').agg({'Total': 'mean'}) |
| Sorting Rows | df.sort_values('column', ascending=True/False) |
Sort rows based on a column | df_can.sort_values('Date', ascending=True) |
| Displaying First n Rows | df.head(n) |
Show the first n rows of the DataFrame | df_can.head(3) |
| Displaying Last n Rows | df.tail(n) |
Show the last n rows of the DataFrame | df_can.tail(3) |
| Checking for Null Values | df.isnull() |
Check for null values in the DataFrame | df_can.isnull() |
| Selecting Rows by Index | df.iloc[index] |
Select rows based on integer index | df_can.iloc[3] |
df.iloc[start:end] |
Select rows in a specified range | df_can.iloc[2:5] |
|
| Selecting Rows by Label | df.loc[label] |
Select rows based on label/index name | df_can.loc['Label'] |
df.loc[start:end] |
Select rows in a specified label/index range | df_can.loc['Age':'Quantity'] |
|
| Summary Statistics | df.describe() |
Generates descriptive statistics for numerical columns | df_can.describe() |
Cheat Sheet : Plot Libraries
| Library | Main Purpose | Key Features | Programming Language | Level of Customization | Dashboard Capabilities | Types of Plots Possible |
|---|---|---|---|---|---|---|
| Matplotlib | General-purpose plotting | Comprehensive plot types and variety of customization options | Python | High | Requires additional components and customization | Line plots, scatter plots, bar charts, histograms, pie charts, box plots, heatmaps, etc. |
| Pandas | Fundamentally used for data manipulation but also has plotting functionality | Easy to plot directly on Panda data structures | Python | Medium | Can be combined with web frameworks for creating dashboards | Line plots, scatter plots, bar charts, histograms, pie charts, box plots, etc. |
| Seaborn | Statistical data visualization | Stylish, specialized statistical plot types | Python | Medium | Can be combined with other libraries to display plots on dashboards | Heatmaps, violin plots, scatter plots, bar plots, count plots, etc. |
| Plotly | Interactive data visualization | interactive web-based visualizations | Python, R, JavaScript | High | Dash framework is dedicated for building interactive dashboards | Line plots, scatter plots, bar charts, pie charts, 3D plots, choropleth maps, etc. |
| Folium | Geospatial data visualization | Interactive, customizable maps | Python | Medium | For incorporating maps into dashboards, it can be integrated with other frameworks/libraries | Choropleth maps, point maps, heatmaps, etc. |
| PyWaffle | Plotting Waffle charts | Waffle charts | Python | Low | Can be combined with other libraries to display waffle chart on dashboards | Waffle charts, square pie charts, donut charts, etc. |
os.chdir(NBdir)
try:
!jupyter nbconvert Vis_HTMLs.ipynb --to html --template pj
except Exception as e:
print('HTML not stored')
import shutil
import os
import shutil
FromFld='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\Visualization\\IBMs\\HTML'
Tofld='C:\\Users\\Gamaliel\\Documents\\G\\ADD\\IBM_DS\\IBM_DS_Jupyter_Tasks\\Python4DataScience\\'
HTML_Notes='Vis_HTMLs.html'
Jupyter_Notes='Vis_HTMLs.ipynb'
try:
if os.path.isfile(Tofld+'/'+HTML_Notes):
os.remove(Tofld+'/'+HTML_Notes)
print(HTML_Notes, 'deleted in', Tofld)
shutil.move(os.path.join(FromFld,HTML_Notes),os.path.join(Tofld,HTML_Notes))
print(HTML_Notes, 'replaced in', Tofld)
else:
shutil.move(os.path.join(FromFld,HTML_Notes),os.path.join(Tofld,HTML_Notes))
print(HTML_Notes, 'written in', Tofld)
except Exception as e:
print('HTML not moved')
# NB
try:
if os.path.isfile(Tofld+'/'+Jupyter_Notes):
os.remove(Tofld+'/'+Jupyter_Notes)
print(Jupyter_Notes, 'deleted in', Tofld)
shutil.copy(os.path.join(FromFld,Jupyter_Notes),os.path.join(Tofld,Jupyter_Notes))
print(Jupyter_Notes, 'copied in', Tofld)
else:
shutil.copy(os.path.join(FromFld,Jupyter_Notes),os.path.join(Tofld,Jupyter_Notes))
print(Jupyter_Notes, 'copied in', Tofld)
except Exception as e:
print('NB not moved')
Vis_py.html deleted in C:\Users\Gamaliel\Documents\G\ADD\IBM_DS\IBM_DS_Jupyter_Tasks\Python4DataScience\ Vis_py.html replaced in C:\Users\Gamaliel\Documents\G\ADD\IBM_DS\IBM_DS_Jupyter_Tasks\Python4DataScience\ Vis_py.ipynb deleted in C:\Users\Gamaliel\Documents\G\ADD\IBM_DS\IBM_DS_Jupyter_Tasks\Python4DataScience\ Vis_py.ipynb copied in C:\Users\Gamaliel\Documents\G\ADD\IBM_DS\IBM_DS_Jupyter_Tasks\Python4DataScience\